Recently, the CellProfiler team at the Broad Institute announced pre-release versions of the python-javabridge and python-bioformats packages available on PyPI. The latter enables easy import of many biological image formats into numpy arrays, without any of the flailing around with converters in Jython that I'd fiddled with in the past.
Before starting, you'll need Java development headers on your machine. I imagine that on Linux systems you'd do something like sudo apt-get install java-devel, but on OSX you head to Apple's Developer downloads page, which requires your Apple ID, and download a "Java for OS X <version> Developer Package".
Then, simply run pip install python-bioformats, and you'll get both packages!
Next, I follow the examples in the python-bioformats documentation to try to read parts of a massive, 400+GB Leica file without giving my RAM a heart attack.
In [1]:
%matplotlib inline
import javabridge as jv
import bioformats as bf
jv.start_vm(class_path=bf.JARS, max_heap_size="8G")
It is imperative to specify the max_heap_size to be something bigger than the anemic default of 256MB. Otherwise, even reading in a small amount of metadata (in our case, a 27MB XML string) will fail. See the following post for more details:
In [2]:
# Change to the appropriate directory and get the file information
%cd /Volumes/LaCie/Data/zebrafish-lesion
%ls
In [3]:
filename = 'Long time Gfap 260314.lif'
rdr = bf.ImageReader(filename, perform_init=True)
reader = rdr.rdr
print("The number of series is %d." % reader.getSeriesCount())
In the next cell, we measure the size of the metadata. I'm maintaining it just for future reference.
In [4]:
import numpy as np
with open('Long time Gfap 260314.lif', "rb") as fd:
fd.read(9)
length = np.frombuffer(fd.read(4), "<i4")
print(length[0])
In [5]:
reader.setSeries(0)
print reader.getMetadataValue('Name')
In [5]:
Let's try the XML approach again, see if it can be read with the new heap size:
In [6]:
md = bf.get_omexml_metadata('Long time Gfap 260314.lif')
It works! Let's see if we can parse it...
In [7]:
print(type(md))
print(len(md))
print(md[:10])
Okay, so we have an XML string. Let's whip out an XML parser to make sense of it.
In [8]:
import xml
import xml.etree.ElementTree as ET
mdroot = ET.fromstring(md)
We figure out the parsing from the xml package doc here. In our case, we are (for now) only interested in the 'Name' attribute of each Image child of the root:
In [9]:
print([(a.tag, a.attrib) for a in list(list(mdroot[300]))])
In [10]:
image_names = []
sizes_tzyxc = []
res_zyx = []
size_tags = ['Size' + c for c in ['T', 'Z', 'Y', 'X', 'C']]
res_tags = ['PhysicalSize' + c for c in ['Z', 'Y', 'X']]
for child in mdroot:
if child.tag.endswith('Image'):
image_names.append(child.attrib['Name'])
for grandchild in child:
if grandchild.tag.endswith('Pixels'):
att = grandchild.attrib
sizes_tzyxc.append(tuple([int(att[t])
for t in size_tags]))
res_zyx.append(tuple([float(att[t]) for t in res_tags]))
In [11]:
sizes_tzyxc[45]
Out[11]:
In [12]:
for name, size in zip(image_names, sizes_tzyxc):
print(name + ' ' + str(size))
In [15]:
len(np.arange(0, 17, 0.5))
Out[15]:
In [19]:
idx0 = 49
size0 = sizes_tzyxc[idx0]
nt, nz = size0[:2]
image5d0 = np.zeros(size0, np.uint8)
reader.setSeries(idx0)
for t in range(nt):
for z in range(nz):
image5d0[t, z] = rdr.read(z=z, t=t, series=idx0, rescale=False)
In [23]:
import vis
vis.cshow(image5d0[..., 0]) # success!
Out[23]:
In [24]:
import sys; sys.path.append('/Users/nuneziglesiasj/projects/lesion/')
In [25]:
squished = image5d0.sum(axis=1)
In [27]:
squished = squished[..., 0]
squished.shape
Out[27]:
In [28]:
from lesion import trace
profiles = [trace.trace_profile(img) for img in squished]
In [ ]:
from matplotlib import pyplot as plt
plt.imshow(squished[-1])
In [66]:
def read_image(rdr, idx, sizes):
size = sizes[idx]
nt, nz = size[:2]
image = np.zeros(size[:-1], np.uint8)
for t in range(nt):
for z in range(nz):
image[t, z] = rdr.read(z=z, t=t, c=0, series=idx, rescale=False)
return image
In [35]:
reader.setSeries(0)
print(sizes_tzyxc[0])
reader.getSizeZ()
Out[35]:
In [68]:
im = read_image(rdr, 0, sizes_tzyxc)
im.shape
Out[68]:
In [61]:
def parse_name(name):
pstart = name.find('Pos') + 3
pend = pstart + 3
position = int(name[pstart:pend])
if name.startswith('Pre'):
times = np.array([-1.0])
elif name.startswith('Mark'):
times = np.array([np.nan])
else:
tstart = 0
tend = min(name.find(' '), name.find('h'))
t0 = float(name[tstart:tend])
tstart = name.find('to ') + 3
tend = name[tstart:].find('h')
t1 = float(name[tstart:tstart + tend])
times = np.arange(t0, t1 + 0.01, 0.5)
return position, times
In [60]:
name = image_names[100]
tstart = 0
tend = min(name.find(' '), name.find('h'))
t0 = float(name[tstart:tend])
tstart = name.find('to ') + 3
tend = name[tstart:].find('h')
tend
print(name, name[tstart:tstart+tend])
In [63]:
postimes = map(parse_name, image_names)
In [69]:
def traces_from_image(imt):
try:
return [trace.trace_profile(im) for im in imt.sum(axis=1)]
except:
return None
In [75]:
imt = read_image(rdr, 100, sizes_tzyxc)
trs = traces_from_image(imt)
In [87]:
postimes[100]
Out[87]:
In [84]:
squished = imt.sum(axis=1)
In [ ]:
plt.plot(trs[-2])
In [ ]:
vis.cshow(squished[-2])
In [ ]:
prog = [np.min(a) / np.max(a) for a in trs]
plt.plot(prog)
In [88]:
all_traces = []
for series_idx in range(len(image_names)):
try:
imt = read_image(rdr, series_idx, sizes_tzyxc)
trs = traces_from_image(imt)
except:
trs = None
all_traces.append(trs)
In [90]:
len([x for x in all_traces if x is None])
Out[90]:
In [92]:
import cPickle as pck
with open('Gfap-traces.pck', 'w') as trs_file:
pck.dump((postimes, all_traces), trs_file, protocol=-1)
In [93]:
def min_over_max(tr):
return tr.min() / tr.max()
In [94]:
unique_positions = len(np.unique([pos for pos, time in postimes]))
unique_positions
Out[94]:
In [99]:
data = [None] * (unique_positions + 1)
for (pos, times), traces in zip(postimes, all_traces):
stats = np.array(map(min_over_max, traces))
xy = np.vstack((times[:len(stats)], stats))
if data[pos] is None:
data[pos] = xy
else:
data[pos] = np.hstack((data[pos], xy))
In [ ]:
ex = data[2]
sel = ~np.isnan(ex.sum(axis=0))
plt.plot(ex[0, sel], ex[1, sel])
In [128]:
import os
def make_plot(position, xydata, fn=os.path.expanduser('~/Desktop/fig.png')):
fig = plt.figure(figsize=(7, 5), dpi=600)
sel = ~np.isnan(xydata.sum(axis=0))
times, ratios = xydata[:, sel]
plt.plot(times, ratios, lw=2, c='k')
for i in range(len(times) - 1):
diff = times[i+1] - times[i]
if diff > 0.5:
print times[i], times[i+1]
plt.fill_between((times[i] + 0.25, times[i+1] - 0.25), (1, 1),
color='k', alpha=0.3)
plt.xlim(-2, times.max() + 1)
plt.ylim(max(0, ratios.min() - 0.1), min(1, ratios.max() + 0.1))
plt.title('Position ' + str(position))
plt.savefig(fn, dpi=600)
plt.show()
In [ ]:
make_plot(1, data[1])
In [ ]:
for position in range(1, len(data)):
make_plot(position, data[position], fn='plots/position%02i.png' % position)
In [ ]:
# Kill the VM at the end! I actually edited this first
# because it's easy to forget!
jv.kill_vm()